*This do-file creates the dataset for the cross-national analysis
*it merges data from various sources


version 15      // Stata version control

clear all          // clear memory

macro drop _all    // delete all macros

set linesize 82    // result window has room for 82 chars in one line

set more off, perm // prevents pause in results windowversion 12


*download our dataset with occupational employment rates 1992-2010 (lfs_occu_rates.dta) and insert it into the data folder
*download our dataset with the ILOs RTI and Offshorability measures (ilo.dta) and insert it into the data folder

*download Visser (2015) data from http://uva-aias.net/en/ictwss and insert "ictwss_v5.1.dta" into data folder renamed to "visser.dta"

*download data from Goos, manning & Salomons (2014) from https://www.aeaweb.org/articles?id=10.1257/aer.104.8.2509 and insert "task.dta" into data folder renamed to "goos.dta"

*download the Comparative Welfare States Dataset (Brady et al. 2014) from http://www.lisdatacenter.org/resources/other-databases/ and insert "CWS-stata.dta" into data folder renamed to "brady.dta"


*********set global paths

/*instructions:
*set up folder structure including a data, tables, and figure folder

*specify globals for paths

global path1 "YOUR_FOLDER_PATH/data"

global path2 "YOUR_FOLDER_PATH/tables"

global path3 "YOUR_FOLDER_PATH/figures"

*from here on the do-file should produce the results and put them in the respective folders

*/

use $path1/lfs_occu_rates.dta, clear

*prepare for merge with Goos et al. (2014) data
rename occu2d occupation
sort country refyear

*we want to keep indicators on routine task intensity and offshorability fomr Goos et al. (2014)
merge m:1 occupation using $path1/goos.dta, keepusing(RTI_alm_isco_77 OFF_gms)  //all matched, as no countries or years in Goos et al. (2014)
drop _merge

*prepare for merging with Visser (2015)
rename refyear year

*create string country variable

gen str country_str=""
replace country_str="Austria" if country==1
replace country_str="Belgium" if country==2
replace country_str="Switzerland" if country==4
replace country_str="Germany" if country==7
replace country_str="Denmark" if country==8
replace country_str="Spain" if country==10
replace country_str="Finland" if country==11
replace country_str="France" if country==12
replace country_str="Greece" if country==13
replace country_str="Ireland" if country==16
replace country_str="Italy" if country==18
replace country_str="Netherlands" if country==23
replace country_str="Norway" if country==24
replace country_str="Portugal" if country==26
replace country_str="Sweden" if country==28
replace country_str="United Kingdom" if country==31

ta country_str country, m //check

rename country country_num
rename country_str country

sort country year

merge m:1 country year using $path1/visser.dta, keepusing(adjcov wc_rights rcb_m rcb_g rs_g rs_m union_rights ext coord)
ta _merge 
*no matches for visser after 2014, no matches for master before 1992 and for some countries (AU, JP, CA, US)
drop _merge


*merge with ILO data
sort country year
merge m:1 country year using $path1/ilo.dta, keepusing(iOffshore iRTI)
*ILO data includes several countries that are not part of master data set, some country-years, too
ta _merge 
drop if _merge!=3
drop _merge

*prepare for merging with Brady et al. (2014) dataset

*yet another country ID variable
gen str id=""
replace id="AUS" if country=="Austria"
replace id="AUL" if country=="Australia"
replace id="BEL" if country=="Belgium"
replace id="CAN" if country=="Canada"
replace id="SWZ" if country=="Switzerland"
replace id="FRG" if country=="Germany"
replace id="DEN" if country=="Denmark"
replace id="SPA" if country=="Spain"
replace id="FIN" if country=="Finland"
replace id="FRA" if country=="France"
replace id="GRE" if country=="Greece"
replace id="IRE" if country=="Ireland"
replace id="ITA" if country=="Italy"
replace id="JPN" if country=="Japan"
replace id="NET" if country=="Netherlands"
replace id="NOR" if country=="Norway"
replace id="NZL" if country=="New Zealand"
replace id="POR" if country=="Portugal"
replace id="SWE" if country=="Sweden"
replace id="USA" if country=="United States"
replace id="UKM" if country=="United Kingdom"

sort id year

merge m:1 id year using $path1/brady.dta, keepusing(postginioecd gdppp tlabfo inempn fcvemp hunemr2 fed leftcab cap100 tradeopen ud)
ta _merge
drop if _merge==2 //some years in Brady et al. (2014) not covered in master data


***********************************************
*****Create variables for analysis*****
***********************************************

*restrict sample to 1970 to 2010

	drop if year>2010 | year<1970


*create DV adjusted coverage

	ta adjcov, m // more than half of the country years (490) are missing, big gaps in 1970s
	ta country year if adjcov==.
	recode adjcov 0=. // two weird years in Greece
	

*create 3 RTI and offshoreability measures (1. based on EULFS occupational data (1992-2010) 2. based on ILO (1970- 3. Merge of the two

*1. generate country-year measures based on EULFS data
gen rti_occemp_perc=RTI_alm_isco_77*occemp_rate //RTI times precentage of employees in occupation
gen off_occemp_perc=OFF_gms*occemp_rate // same procedure for offshorability


bysort country year: egen rte_eu=sum(rti_occemp_perc)
bysort country year: egen off_eu=sum(off_occemp_perc)


replace rte_eu=. if rte_eu==0 
replace off_eu=. if off_eu==0 


* reduce to one observation per country-year
sort country year
drop if country==country[_n-1] & year==year[_n-1]


*2. create measures based on ILO data

ta iRTI, m // only until 2007, France and Italy lots of missing years
ta country year if iRTI==.
rename iRTI rte_ilo

ta iOffshore, m // only until 2007, France and Italy lots of missing years
ta country year if iOffshore==.
rename iOffshore off_ilo

*3. create merged version with priority on EULFS data

*Fill in missing years with crappy data
gen rte_merge=rte_eu
replace rte_merge=rte_ilo if rte_eu==.
ta rte_merge, m

gen off_merge=off_eu
replace off_merge=off_ilo if off_eu==.





*create IV bargaining extension

ta ext, m // 20 missings
ta country year if ext==. //PT, ES. GR in 1970s


*create covariates

*create percentage of workers in industry
gen indperc=inempn/tlabfo 
ta indperc, m //3 missings

*create rate of female employment 
gen femperc=fcvemp/tlabfo 
ta femperc, m //15 missings

*create logged gdp
gen loggdp=log(gdppp)
ta loggdp, m //no missings

*create left cabinet
gen leftgov=leftcab
ta leftgov, m //16 missing

*create federalism
ta fed, m //18 missing

*create work council rights
ta wc_rights, m //28 missing
ta country year if wc_rights==. //Greec & Prtugal until the 1980s

*create unemployment rate
gen unempr=hunemr2
ta unempr, m //no missings

*create strike rights
gen strike_right=(rs_g+rs_m)/2
ta strike_right, m //no missings

*create organization rights
gen org_right=(rcb_g+rcb_m)/2
ta org_right, m //no missings

*create openess to capital
rename cap100 capopen
ta capopen, m //136 missing
ta country year if cap==. //no data since 2005, Ireland missing since 2000

*creat openess to trade
ta tradeopen, m //no missings

*create wage bargaining coordination
ta coord, m //15 missings
ta country year if coord==. //PT & Spain in 1970s

*create union density
ta ud, m //22 missings
ta country year if ud==. //PT, GR & ES in 1970s

*create gini (only for descriptive figures)
rename postginioecd gini
ta gini, m //595 missings


*interpolate to cover missings
*generate indicator for country-years with missingness
gen miss1=1 if adjcov==. | off_merge==. | rte_merge==. | ext==. | indperc==. | ///
	femperc==. | loggdp==. | leftgov==. | fed==. | unempr==. | capopen==. | ///
	tradeopen ==. | wc_rights==. | strike_right==. | org_right==. | coord==. | ud==.

*interpolation	
sort country year
foreach var in adjcov off_merge rte_merge ext indperc femperc loggdp leftgov fed unempr capopen tradeopen wc_rights strike_right org_right coord ud {
	replace `var'=`var'[_n-1] if country==country[_n-1] & `var'==.
	}

gen miss2=1 if adjcov==. | off_merge==. | rte_merge==. | ext==. | indperc==. | ///
	femperc==. | loggdp==. | leftgov==. | fed==. | unempr==. | capopen==. | ///
	tradeopen ==. | wc_rights==. | strike_right==. | org_right==. | coord==. | ud==.
	
encode country, gen(cnt)	
	
keep adjcov capopen cnt coord country ext fed femperc gini id indperc leftgov loggdp ///
 miss1 miss2 off_eu off_ilo off_merge org_right rte_eu rte_ilo rte_merge ///
 strike_right tradeopen ud unempr union_rights wc_rights year	
 
*create standardized variables
foreach var in off_merge rte_merge ext indperc femperc loggdp leftgov fed unempr capopen tradeopen wc_rights strike_right org_right coord ud {
	egen z_`var'=std(`var')
	} 


save $path1/full_data.dta, replace

exit
